/* 02B_EEHH_NEWVARS.do             KTS/DCC/NLB             yyyy-mm-dd:2026-02-04
----|----1----|----2----|----3----|----4----|----5----|----6----|----7----|----8

This do file creates the data used in the paper "Estimating Inter­generational
Returns to Medical Care: New Evidence from At­Risk Newborns" written by
Damian Clarke, Nicolas Lillo Bustos and Kathya Tapia-Schythe.  
In certain cases these results will require the user-written, labutil,
personage commands. 
*/

clear all 
set more off 
timer on 1

*-------------------------------------------------------------------------------
*--- 1. Merge EEHH data to NAC data 
*-------------------------------------------------------------------------------

* Load birth data:
use "${nac_original}_NOGLOSAS_NODUPS_NONAS.dta", clear

* Keep only ID, birth date, birth year, and mother's ID:
keep ID_RECIEN_NACIDO FECHA_NACIMIENTO_SIF ANO_NAC SEXO ID_MADRE

* Create new ID to match to EEHH data:
gen ID_PACIENTE = ID_RECIEN_NACIDO

* Rename master dataset variables to separate from EEHH variables:
rename FECHA_NACIMIENTO_SIF NAC_FECHA_NACIMIENTO_SIF
rename ANO_NAC NAC_ANO_NAC
rename SEXO NAC_SEXO
rename ID_MADRE NAC_ID_MADRE

* Merge in EEHH data:
merge 1:m ID_PACIENTE using "${eehh_original}_NODUPS_NONAS.dta", ///
	gen(mrg_EEHH2NAC) keep(master match)
	
label var mrg_EEHH2NAC "merge 1:m ID_PACIENTE using EEHH, keep(master match)"
note mrg_EEHH2NAC: merge variable created in do file EEHH_01_MERGE2NAC.do

* Get minimum and maximum EEHH year:
sum $eyear_var
scalar min_ano_eehh = r(min)
scalar max_ano_eehh = r(max)

* Compress, sort, label, sign, and save:
compress
label data "EEHH `=min_ano_eehh'-`=max_ano_eehh' en Chile (DEIS/MINSAL), -duplicados/NAs, -Solo IDs $nac_original"
notes drop _dta
notes: Last modified on $S_DATE at $S_TIME
save "${eehh_original}_NODUPS_NONAS_SOLOIDNACs.dta", replace

*-------------------------------------------------------------------------------
*--- 2. Create new variables
*-------------------------------------------------------------------------------

* Load data:
use "${eehh_original}_NODUPS_NONAS_SOLOIDNACs.dta", clear


* CALCULATE IMPORTANT SCALARS:

* Get minimum and maximum NAC year:
sum NAC_$byear_var
scalar min_ano_nac = r(min)
scalar max_ano_nac = r(max)

* Get minimum and maximum EEHH year:
sum $eyear_var
scalar min_ano_eehh = r(min)
scalar max_ano_eehh = r(max)


* REDUCE DATASET:
* local macro for temporary file:
tempfile unmatched_nacids

* Save list of unmatched NAC IDs (to be appended back afterwards):
preserve
keep if mrg_EEHH2NAC == 1
compress
save `unmatched_nacids', replace
restore

* Keep only matched NAC IDs (ie NAC IDs that visited a hospital at least once):
keep if mrg_EEHH2NAC == 3

* Fix DIAG1 and DIAG2 variables:
replace DIAG1 = strupper(DIAG1)
replace DIAG2 = strupper(DIAG2)


* CREATE NEW VARIABLES:

* Create admission date:
gen int fecha_ingreso = FECHA_EGRESO - DIAS_ESTADA + 1
format fecha_ingreso %td
label var fecha_ingreso "Admission to hospital date (FECHA_EGRESO - DIAS_ESTADA + 1)"

* Get admission order:
sort ID_PACIENTE fecha_ingreso
by ID_PACIENTE: egen int inordent = rank(fecha_ingreso), track
label var inordent "Hospital visit order (by admission date, track)"
by ID_PACIENTE: egen int inordenu = rank(fecha_ingreso), unique
label var inordenu "Hospital visit order (by admission date, unique)"

* Get discharge order:
by ID_PACIENTE: egen int outordent = rank(FECHA_EGRESO), track
label var inordent "Hospital visit order (by discharge date, track)"
by ID_PACIENTE: egen int outordenu = rank(FECHA_EGRESO), unique
label var inordenu "Hospital visit order (by discharge date, unique)"

* Get total number of admissions:
by ID_PACIENTE: egen int innvsts = count(fecha_ingreso)
label var innvsts "Total number of hospital admissions (non-missing admissions date)"

* Get total number of discharges:
by ID_PACIENTE: egen int outnvsts = count(FECHA_EGRESO)
label var outnvsts "Total number of hospital discharges (non-missing discharge date)"

* Tag discharges that have the same admission date:
by ID_PACIENTE fecha_ingreso: egen n_dis_by_admdate = count(FECHA_EGRESO)
label var n_dis_by_admdate "Number of discharge dates with the same admission date"

* Tag admissions that have the same discharge date:
bys ID_PACIENTE FECHA_EGRESO: egen n_adm_by_disdate = count(fecha_ingreso)
label var n_adm_by_disdate "Number of admission dates with the same discharge date"

* Tag one observation by ID:
egen byte tag_idp = tag(ID_PACIENTE)

* Convert patient id to numeric:
egen long idp = group(ID_PACIENTE)

* Sort and declare panel:
sort idp inordenu
xtset idp inordenu

* Tag overlapping visits:
gen byte olap = fecha_ingreso < L1.FECHA_EGRESO if inordenu > 1
replace olap = 0 if inordenu == 1
label var olap "Current visit overlaps with previous."


* Get patient age at time of admission (using birth date from births database):
personage NAC_FECHA_NACIMIENTO_SIF fecha_ingreso, gen(edad_ingreso dias_ingreso dias_anoedad_ingreso)
label var edad_ingreso "Age at time of admission (years)"
label var dias_ingreso "Age at time of admission (days since last birthday)"
label var dias_anoedad_ingreso "Number of days in the year that ends with the next birthday (admission)"
replace dias_anoedad_ingreso = 365 if dias_anoedad_ingreso == . & month(NAC_FECHA_NACIMIENTO_SIF) == 2 & day(NAC_FECHA_NACIMIENTO_SIF) == 29

* Get patient age at time of discharge (using birth date from births database):
personage NAC_FECHA_NACIMIENTO_SIF FECHA_EGRESO, gen(edad_egreso dias_egreso dias_anoedad_egreso)
label var edad_egreso "Age at time of discharge (years)"
label var dias_egreso "Age at time of discharge (days since last birthday)"
label var dias_anoedad_egreso "Number of days in the year that ends with the next birthday (discharge)"
replace dias_anoedad_egreso = 365 if dias_anoedad_egreso == . & month(NAC_FECHA_NACIMIENTO_SIF) == 2 & day(NAC_FECHA_NACIMIENTO_SIF) == 29

* Months since birth: Admission
gen byte meses_ingreso = floor(dias_ingreso / (dias_anoedad_ingreso / 12)) + 12 * edad_ingreso
label var meses_ingreso "Admission number of months since birth"

* Months since birth: Discharge
gen byte meses_egreso = floor(dias_egreso / (dias_anoedad_egreso / 12)) + 12 * edad_egreso
label var meses_ingreso "Discharge number of months since birth"

* Difference in months between admission and discharge:
gen byte diff_edad_meses = meses_egreso - meses_ingreso
label var diff_edad_meses "Difference in months between hospital admission and discharge"

* Difference in years between age at admission and discharge:
gen byte diff_edad_anos = edad_egreso - edad_ingreso
label var diff_edad_anos "Difference in years between hospital admission and discharge"

* Get patient age when EEHH dataset started:
personage NAC_FECHA_NACIMIENTO_SIF, ///
	currdate(`=mdy(1, 1, min_ano_eehh)') g(age_EEHH_start)
label var age_EEHH_start "Age at beginning of EEHH dataset"

* Get patient age when EEHH dataset ends:
personage NAC_FECHA_NACIMIENTO_SIF, ///
	currdate(`=mdy(12, 31, max_ano_eehh)') g(age_EEHH_end)
label var age_EEHH_end "Age at end of EEHH dataset"

* Replace negative age values as missing:
replace edad_ingreso = . if edad_ingreso < 0
replace dias_ingreso = . if dias_ingreso < 0
replace edad_egreso = . if edad_egreso < 0
replace dias_egreso = . if dias_egreso < 0
replace age_EEHH_start = . if age_EEHH_start < 0
replace age_EEHH_end = . if age_EEHH_end < 0


* CATEGORIZATIONS BY TYPE OF HEALTHCARE:

* Create dummy variable for public/private hospital:
gen byte public_hosp = PERTENENCIA_SNSS == 2 if PERTENENCIA_SNSS != .
label var public_hosp "Private = 0 / Public = 1"

* Create dummy variable for whether visit is covered by ISAPRE or not:
gen byte isapre = PREVISION == 2 if PREVISION <= 2 | PREVISION == 96
label var isapre "ISAPRE = 1, FONASA/NONE = 0, ARMED FORCES = Missing"


* CATEGORIZATIONS OF DELIVERIES:

/* 
Birth-related ICD-10 chapters XV, XVI, and XVII: 
	codes O00-O99, P00-P96, Q00-Q99
*/
gen byte birth_codes = 	substr(DIAG1, 1, 1) == "O" | ///
						substr(DIAG1, 1, 1) == "P" | ///
						substr(DIAG1, 1, 1) == "Q"
label var birth_codes "ICD-10 codes related to birth and birth issues (Ch. XV/XVI/XVII)"

* Abortions: codes O00-O08
gen byte abortion = substr(DIAG1, 1, 1) == "O" & real(substr(DIAG1, -3, 3)) < 100
label var abortion "Abortion related ICD-10 codes (O00-O08)"

/* 
All reproductive health problems (not deliveries) that do not result in death:
   N70-N77	Inflammatory diseases of female pelvic organs
   N80-N98	Noninflammatory disorders of female genital tract
   O00-O08	Pregnancy with abortive outcome.
   O10-O16	Oedema, proteinuria and hypertensive disorders in pregnancy, 
			childbirth and the puerperium.
   O20-O29	Other maternal disorders predominantly related to pregnancy.
   O94X		Sequelae of complicaton of pregnancy, childbirth and the puerperium.
   O99		Other maternal diseases classifiable elsewhere but complicating pregnancy and the puerperium.
*/
gen byte rephlth = substr(DIAG1, 1, 1) == "O" & real(substr(DIAG1, 2, 2)) < 30
replace rephlth = 1 if substr(DIAG1, 1, 3) == "O99"
replace rephlth = 1 if substr(DIAG1, 1, 2) == "N7"
replace rephlth = 1 if substr(DIAG1, 1, 2) == "N8"
replace rephlth = 1 if substr(DIAG1, 1, 2) == "N9" & real(substr(DIAG1, 3, 1)) < 9
label var rephlth "ICD-10 codes affecting reproductive health (N70-N77, N80-N98, O00-O29, O99)"

* All deliveries:
gen byte all_dlvrys = substr(DIAG1, 1, 2) == "O8" & real(substr(DIAG1, -2, 2)) < 50
label var all_dlvrys "Delivery related ICD-10 codes (O80-O84)"

/*   
Spontaneous deliveries:
   O80	Parto único espontáneo
   O800	Parto único espontáneo, presentación cefálica de vértice
   O801	Parto único espontáneo, presentación de nalgas o podálica
   O808	Parto único espontáneo, otras presentaciones
   O809	Parto único espontáneo, sin otra especificación: Parto espontáneo SAI
   O840	Parto múltiple, todos espontáneos
*/
gen byte spont_dlvry = 	DIAG1=="O80X" | DIAG1=="O800" | DIAG1=="O801" | ///
						DIAG1=="O808" | DIAG1=="O809" | DIAG1=="O840"
label var spont_dlvry "Spontaneous delivery: ICD Codes O80/O800/O801/O808/O809/O840"
/* 
C-sections:
   O82  Parto único por cesárea
   O820 Parto por cesárea electiva: Cesárea repetida SAI
   O821 Parto por cesárea de emergencia
   O822 Parto por cesárea con histerectomía
   O828 Otros partos únicos por cesárea
   O829 Parto por cesárea, sin otra especificación
   O842 Parto múltiple, todos por cesárea
   P034	Feto y recién nacido afectados por parto por cesárea
   
(Maybes:
   O601 Preterm spontaneous labour with preterm delivery
			Preterm labour with delivery NOS
			Preterm spontaneous labour with preterm delivery by caesarean section
   O602 Preterm spontaneous labour with term delivery
			Preterm spontaneous labour with term delivery by caesarean section
   O603 Preterm delivery without spontaneous labour
			Preterm delivery by:
				caesarean section, without spontaneous labour
				induction
)
*/						
gen byte csection = DIAG1=="O82X" | DIAG1=="O820" | DIAG1=="O821" | ///
					DIAG1=="O822" | DIAG1=="O828" | DIAG1=="O829" | ///
					DIAG1=="O842" | DIAG1=="P034"
label var csection "C-section: ICD Codes O82/O820/O821/O822/O828/O829/O842/P034"

/*
Parto asistido y con forceps:
   O81	Parto único con fórceps y ventosa extractora
   O810	Parto con fórceps bajo
   O811	Parto con fórceps medio
   O812	Parto con fórceps medio con rotación
   O813	Parto con fórceps de otros tipos y los no especificados
   O814	Parto con ventosa extractora: Vacuum extractor
   O815	Parto con combinación de fórceps y ventosa extractora: Parto por fórceps y ventosa
   O841	Parto múltiple, todos por fórceps y ventosa extractora
   O83	Otros partos únicos asistidos
   O830 Extracción de nalgas
   O831	Otros partos únicos asistidos, de nalgas: Parto de nalgas SAI
   O832	Otros partos únicos con ayuda de manipulación obstétrica: Versión con extracción
   O833	Parto de feto viable en embarazo abdominal
   O834	Operación destructiva para facilitar el parto: Cleidotomía, Craneotomía, Embriotomía
   O838	Otros partos únicos asistidos especificados
   O839	Parto único asistido, sin otra especificación: Parto asistido SAI
*/
gen byte asstd_dlvry = 	DIAG1=="O81X" | DIAG1=="O810" | DIAG1=="O811" | ///
						DIAG1=="O812" | DIAG1=="O813" | DIAG1=="O814" | ///
						DIAG1=="O815" | DIAG1=="O841" | DIAG1=="O83X" | ///
						DIAG1=="O830" | DIAG1=="O831" | DIAG1=="O832" | ///
						DIAG1=="O833" | DIAG1=="O834" | DIAG1=="O838" | ///
						DIAG1=="O839"
label var asstd_dlvry "Assisted delivery codes (not c-section)"


* Categorize diagnoses by chapter:
gen cap_diag1 = "A00-B99" if substr(DIAG1, 1, 1) == "A" | substr(DIAG1, 1, 1) == "B"
replace cap_diag1 = "C00-D48" if substr(DIAG1, 1, 1) == "C"
replace cap_diag1 = "C00-D48" if substr(DIAG1, 1, 1) == "D" & real(substr(DIAG1, 2, 3)) < 500
replace cap_diag1 = "D50-D89" if substr(DIAG1, 1, 1) == "D" & real(substr(DIAG1, 2, 3)) >= 500
replace cap_diag1 = "E00-E90" if substr(DIAG1, 1, 1) == "E"
replace cap_diag1 = "F00-F99" if substr(DIAG1, 1, 1) == "F"
replace cap_diag1 = "G00-G99" if substr(DIAG1, 1, 1) == "G"
replace cap_diag1 = "H00-H59" if substr(DIAG1, 1, 1) == "H" & real(substr(DIAG1, 2, 3)) < 600
replace cap_diag1 = "H60-H95" if substr(DIAG1, 1, 1) == "H" & real(substr(DIAG1, 2, 3)) >= 600
replace cap_diag1 = "I00-I99" if substr(DIAG1, 1, 1) == "I"
replace cap_diag1 = "J00-J99" if substr(DIAG1, 1, 1) == "J"
replace cap_diag1 = "K00-K93" if substr(DIAG1, 1, 1) == "K"
replace cap_diag1 = "L00-L99" if substr(DIAG1, 1, 1) == "L"
replace cap_diag1 = "M00-M99" if substr(DIAG1, 1, 1) == "M"
replace cap_diag1 = "N00-N99" if substr(DIAG1, 1, 1) == "N"
replace cap_diag1 = "O00-O99" if substr(DIAG1, 1, 1) == "O"
replace cap_diag1 = "P00-P96" if substr(DIAG1, 1, 1) == "P"
replace cap_diag1 = "Q00-Q99" if substr(DIAG1, 1, 1) == "Q"
replace cap_diag1 = "R00-R99" if substr(DIAG1, 1, 1) == "R"
replace cap_diag1 = "S00-T98" if substr(DIAG1, 1, 1) == "S" | substr(DIAG1, 1, 1) == "T"
replace cap_diag1 = "V00-Y98" if substr(DIAG1, 1, 1) == "V" | substr(DIAG1, 1, 1) == "W" | substr(DIAG1, 1, 1) == "X" | substr(DIAG1, 1, 1) == "Y"
replace cap_diag1 = "Z00-Z99" if substr(DIAG1, 1, 1) == "Z"
replace cap_diag1 = "U00-U99" if substr(DIAG1, 1, 1) == "U"

label var cap_diag1 "ICD-10 Chapter for DIAG1"

gen cap_diag2 = "A00-B99" if substr(DIAG2, 1, 1) == "A" | substr(DIAG2, 1, 1) == "B"
replace cap_diag2 = "C00-D48" if substr(DIAG2, 1, 1) == "C"
replace cap_diag2 = "C00-D48" if substr(DIAG2, 1, 1) == "D" & real(substr(DIAG2, 2, 3)) < 500
replace cap_diag2 = "D50-D89" if substr(DIAG2, 1, 1) == "D" & real(substr(DIAG2, 2, 3)) >= 500
replace cap_diag2 = "E00-E90" if substr(DIAG2, 1, 1) == "E"
replace cap_diag2 = "F00-F99" if substr(DIAG2, 1, 1) == "F"
replace cap_diag2 = "G00-G99" if substr(DIAG2, 1, 1) == "G"
replace cap_diag2 = "H00-H59" if substr(DIAG2, 1, 1) == "H" & real(substr(DIAG2, 2, 3)) < 600
replace cap_diag2 = "H60-H95" if substr(DIAG2, 1, 1) == "H" & real(substr(DIAG2, 2, 3)) >= 600
replace cap_diag2 = "I00-I99" if substr(DIAG2, 1, 1) == "I"
replace cap_diag2 = "J00-J99" if substr(DIAG2, 1, 1) == "J"
replace cap_diag2 = "K00-K93" if substr(DIAG2, 1, 1) == "K"
replace cap_diag2 = "L00-L99" if substr(DIAG2, 1, 1) == "L"
replace cap_diag2 = "M00-M99" if substr(DIAG2, 1, 1) == "M"
replace cap_diag2 = "N00-N99" if substr(DIAG2, 1, 1) == "N"
replace cap_diag2 = "O00-O99" if substr(DIAG2, 1, 1) == "O"
replace cap_diag2 = "P00-P96" if substr(DIAG2, 1, 1) == "P"
replace cap_diag2 = "Q00-Q99" if substr(DIAG2, 1, 1) == "Q"
replace cap_diag2 = "R00-R99" if substr(DIAG2, 1, 1) == "R"
replace cap_diag2 = "S00-T98" if substr(DIAG2, 1, 1) == "S" | substr(DIAG2, 1, 1) == "T"
replace cap_diag2 = "V00-Y98" if substr(DIAG2, 1, 1) == "V" | substr(DIAG2, 1, 1) == "W" | substr(DIAG2, 1, 1) == "X" | substr(DIAG2, 1, 1) == "Y"
replace cap_diag2 = "Z00-Z99" if substr(DIAG2, 1, 1) == "Z"
replace cap_diag2 = "U00-U99" if substr(DIAG2, 1, 1) == "U"

label var cap_diag2 "ICD-10 Chapter for DIAG2"



/** Chronic diseases:
gen byte chronic = 0
do "$dodir/Processing/chronic.do"
label var chronic "Chronic disease"
*/

* Non-external conditions:
gen byte notext = cap_diag1 != "S00-T98" & cap_diag1 != "V00-Y98" ///
	& cap_diag1 != "Z00-Z99" & cap_diag1 != "U00-U99" ///
	& cap_diag1 != "O00-O99" & cap_diag1 != "P00-P96" & cap_diag1 != "Q00-Q99"
label var notext "Non-external ICD-10 codes"
note notext: Excludes ICD-10 chapters XV, XVI, XVII, XIX, XX, XXI, and XXII.


* APPEND BACK UNMATCHED NAC IDs:
append using `unmatched_nacids'

* Dichotomise merge variable:
gen byte visit = mrg_EEHH2NAC == 3
label var visit "Observation is a hospital visit"


* Compress, sort, label, sign, and save:
compress
sort ID_PACIENTE fecha_ingreso
label data "EEHH `=min_ano_eehh'-`=max_ano_eehh', -duplicados/NAs, Solo IDs $nac_original, +nuevas variables"
notes drop _dta
notes: Last modified on $S_DATE at $S_TIME
save "${eehh_original}_NODUPS_NONAS_SOLOIDNACs_NEWVARS.dta", replace

*-------------------------------------------------------------------------------
*--- 3. Create days spent in hospital 
*-------------------------------------------------------------------------------

*-------------------------
* 3.1. Yearly
*-------------------------

* Load data:
use "${eehh_original}_NODUPS_NONAS_SOLOIDNACs_NEWVARS.dta", clear


* CALCULATE IMPORTANT SCALARS:

* Get minimum and maximum NAC year:
sum NAC_$byear_var
scalar min_ano_nac = r(min)
scalar max_ano_nac = r(max)

* Get minimum and maximum EEHH year:
sum $eyear_var
scalar min_ano_eehh = r(min)
scalar max_ano_eehh = r(max)


* REDUCE DATASET:
* local macro for temporary file:
tempfile unmatched_nacids

* Save list of unmatched NAC IDs (to be appended back afterwards):
preserve
keep if mrg_EEHH2NAC == 1
save `unmatched_nacids', replace
restore

* Keep only matched NAC IDs (ie NAC IDs that visited a hospital at least once):
keep if mrg_EEHH2NAC == 3


* Create variables for days spent in hospital by age and fill in values for 
* visits starting and ending during the same age:
sum age_EEHH_end
forval y = 0(1)`r(max)' {
	local yy = string(`y', "%02.0f")
	gen int days_y`yy' = DIAS_ESTADA if edad_ingreso == `y' & edad_egreso == `y'
	label var days_y`yy' "Days spent in hospital at age `y'"
}

* 1 or more years difference between age at admission and discharge:
foreach var of varlist days_y?? {
	local yy = subinstr("`var'", "days_y", "", 1)
	local y = real("`yy'")
	
	* First year:
	replace `var' = dias_anoedad_ingreso - dias_ingreso + 1 ///
		if edad_ingreso == `y' & edad_egreso == `y' + diff_edad_anos & diff_edad_anos > 0
	
	* Last year:
	replace `var' = dias_egreso ///
		if edad_ingreso == `y' - diff_edad_anos & edad_egreso == `y' & diff_edad_anos > 0
	
	* Middle years:
	replace `var' = 365 ///
		if edad_ingreso < `y' & `y' < edad_egreso & `var' == . & diff_edad_anos > 0
}


* APPEND BACK UNMATCHED NAC IDs:
append using `unmatched_nacids'


* Calculate number of admissions and discharges by age 
* (valid for unmatched NAC IDs as well):
foreach var of varlist days_y?? {
	local yy = subinstr("`var'", "days_y", "", 1)
	local y = real("`yy'")
	
	gen byte nadmssn_y`yy' = `var' != . & edad_ingreso == `y'
	label var nadmssn_y`yy' "Number of admissions to hospital that started at age `y'"
	
	gen byte ndischrg_y`yy' = `var' != . & edad_egreso == `y'
	label var ndischrg_y`yy' "Number of discharges from hospital that occurred at age `y'"
}


* Compress, sort, label, sign, and save:
compress
sort ID_PACIENTE fecha_ingreso
label data "EEHH `=min_ano_eehh'-`=max_ano_eehh', visit days spread over years"
notes drop _dta
notes: Last modified on $S_DATE at $S_TIME
save "${eehh_original}_DAYS_Y.dta", replace

*-------------------------
* 3.2. Monthly
*-------------------------

* Load data:
use "${eehh_original}_NODUPS_NONAS_SOLOIDNACs_NEWVARS.dta", clear


* CALCULATE IMPORTANT SCALARS:

* Get minimum and maximum NAC year:
sum NAC_$byear_var
scalar min_ano_nac = r(min)
scalar max_ano_nac = r(max)

* Get minimum and maximum EEHH year:
sum $eyear_var
scalar min_ano_eehh = r(min)
scalar max_ano_eehh = r(max)


* REDUCE DATASET FOR FASTER EXECUTION:
* local macro for temporary file:
tempfile unmatched_nacids non_infant_admissions

* Save list of unmatched NAC IDs (to be appended back afterwards):
preserve
keep if mrg_EEHH2NAC == 1
save `unmatched_nacids', replace
restore

* Keep only matched NAC IDs (ie NAC IDs that visited a hospital at least once):
keep if mrg_EEHH2NAC == 3

* Save list of observations of non infant admissions:
preserve
keep if edad_ingreso >= 1
save `non_infant_admissions', replace
restore

* Keep only infant admissions:
keep if edad_ingreso == 0


* Create variables for days spent in hospital by month in the first year and 
* fill in values for visits starting and ending during the same age in months:
forval m = 0(1)11 {
	local mm = string(`m', "%02.0f")
	gen byte days_m`mm' = DIAS_ESTADA if meses_ingreso == `m' & meses_egreso == `m'
	label var days_m`mm' "Number of hospital days spent during month `m' of life"
}

* 1 or more months difference between age at admission and discharge:
foreach var of varlist days_m?? {
	local mm = subinstr("`var'", "days_m", "", 1)
	local m = real("`mm'")
	
	* First month:
	replace `var' = floor((dias_anoedad_ingreso / 12)) - (dias_ingreso - meses_ingreso * floor((dias_anoedad_ingreso / 12))) + 1 ///
		if meses_ingreso == `m' & meses_ingreso == `m' + diff_edad_meses & diff_edad_meses > 0
	
	* Last month:
	replace `var' = dias_egreso - meses_egreso * floor((dias_anoedad_egreso / 12)) ///
		if meses_ingreso == `m' - diff_edad_meses & meses_egreso == `m' & diff_edad_meses > 0
	
	* Middle months:
	replace `var' = floor((365 / 12)) ///
		if meses_ingreso < `m' & `m' < meses_egreso & `var' == . & diff_edad_meses > 0
		
	* Cut at 31 days:
	replace `var' = min(`var', 31) if `var' != .
}


* APPEND BACK DROPPED OBSERVATIONS:
append using `unmatched_nacids'
append using `non_infant_admissions'



* Calculate number of admissions and discharges by month 
* (valid for unmatched NAC IDs as well):
foreach var of varlist days_m?? {
	local mm = subinstr("`var'", "days_m", "", 1)
	local m = real("`mm'")
	
	gen byte nadmssn_m`mm' = `var' != . & meses_ingreso == `m'
	label var nadmssn_m`mm' "Number of admissions to hospital that started during month `m'"
	
	gen byte ndischrg_m`mm' = `var' != . & meses_egreso == `m'
	label var ndischrg_m`mm' "Number of discharges from hospital that occurred during month `m'"
}



* Compress, sort, label, sign, and save:
compress
sort ID_PACIENTE fecha_ingreso
label data "EEHH `=min_ano_eehh'-`=max_ano_eehh', visit days spread over months (for infant admissions)"
notes drop _dta
notes: Last modified on $S_DATE at $S_TIME
save "${eehh_original}_DAYS_M.dta", replace

*-------------------------------------------------------------------------------
*--- 4. Aggregate by Age (in Years). ///
	   Do-Files "In Months" don't exist and aren't used in MERGE.
*-------------------------------------------------------------------------------

*-------------------------
* 4.1 ALL
*-------------------------


* Load data:
use "${eehh_original}_DAYS_Y.dta", clear


* CALCULATE IMPORTANT SCALARS:

* Get minimum and maximum NAC year:
sum NAC_$byear_var
scalar min_ano_nac = r(min)
scalar max_ano_nac = r(max)

* Get minimum and maximum EEHH year:
sum $eyear_var
scalar min_ano_eehh = r(min)
scalar max_ano_eehh = r(max)


* Store variable labels before collapse:
foreach var of varlist days_y?? nadmssn_y?? ndischrg_y?? {
	local `var'_lbl : variable label `var'
}

* Collapse:
collapse 	(sum) days_y?? nadmssn_y?? ndischrg_y?? ///
			(sum) total_discharges = visit, ///
			by(ID_RECIEN_NACIDO NAC_* mrg_EEHH2NAC)

* Re-apply labels:
foreach var of varlist days_y?? nadmssn_y?? ndischrg_y?? {
	label var `var' "``var'_lbl'"
}
label var total_discharges "Total number of hospital discharges `=min_ano_eehh'-`=max_ano_eehh'"

* Convert 0s to missing and cap at 365 days:
foreach var of varlist days_y?? {
	replace `var' = . if `var' == 0
	replace `var' = min(`var', 365) if `var' != .
}

* Convert days and admission/discharges variables to missing for ages not covered by EEHH database:
foreach var of varlist days_y?? {
	local yy = subinstr("`var'", "days_y", "", 1)
	local y = real("`yy'")
	
	replace `var' = . if NAC_ANO_NAC + `y' < min_ano_eehh
	replace nadmssn_y`yy' = . if NAC_ANO_NAC + `y' < min_ano_eehh
	replace ndischrg_y`yy' = . if NAC_ANO_NAC + `y' < min_ano_eehh
}

* Convert days and admission/discharges variables to missing for ages unattained by end of EEHH database:
foreach var of varlist days_y?? {
	local yy = subinstr("`var'", "days_y", "", 1)
	local y = real("`yy'")
	
	replace `var' = . if NAC_ANO_NAC + `y' > max_ano_eehh
	replace nadmssn_y`yy' = . if NAC_ANO_NAC + `y' > max_ano_eehh
	replace ndischrg_y`yy' = . if NAC_ANO_NAC + `y' > max_ano_eehh
}


* Merge in mortality:
gen ID_FALLECIDO = ID_RECIEN_NACIDO
merge 1:1 ID_FALLECIDO using "DEF_1990_2018_NOGLOSAS_NODUPS_NONAS.dta", ///
	gen(mrg_DEF2NAC) keep(master match) keepusing(ANO_DEF)

label var mrg_DEF2NAC "merge 1:1 ID_FALLECIDO using DEF, keep(master match)"
	
* Convert days and admission/discharges variables to missing for ages unattained by dead people:
foreach var of varlist days_y?? {
	local yy = subinstr("`var'", "days_y", "", 1)
	local y = real("`yy'")
	
	replace `var' = . if mrg_DEF2NAC == 3 & ANO_DEF < NAC_ANO_NAC + `y'
	replace nadmssn_y`yy' = . if mrg_DEF2NAC == 3 & ANO_DEF < NAC_ANO_NAC + `y'
	replace ndischrg_y`yy' = . if mrg_DEF2NAC == 3 & ANO_DEF < NAC_ANO_NAC + `y'
}



* Compress, sort, label, sign, and save:
compress
sort ID_RECIEN_NACIDO
label data "Number of hospitalizations and days hospitalized, by age"
notes drop _dta
notes: Last modified on $S_DATE at $S_TIME
save "DAYS_Y_AGG_ALL.dta", replace

*-------------------------
* 4.2 PRIVATE
*-------------------------

* Load data:
use "${eehh_original}_DAYS_Y.dta", clear


* CALCULATE IMPORTANT SCALARS:

* Get minimum and maximum NAC year:
sum NAC_$byear_var
scalar min_ano_nac = r(min)
scalar max_ano_nac = r(max)

* Get minimum and maximum EEHH year:
sum $eyear_var
scalar min_ano_eehh = r(min)
scalar max_ano_eehh = r(max)


* FILTER DATASET: only private hospitals

foreach var of varlist days_y?? {
	local yy = subinstr("`var'", "days_y", "", 1)
	local y = real("`yy'")
	
	replace `var' = . if public_hosp == 1
	replace nadmssn_y`yy' = 0 if public_hosp == 1
	replace ndischrg_y`yy' = 0 if public_hosp == 1
}


* Store variable labels before collapse:
foreach var of varlist days_y?? nadmssn_y?? ndischrg_y?? {
	local `var'_lbl : variable label `var'
}

* Collapse:
collapse 	(sum) days_y?? nadmssn_y?? ndischrg_y?? ///
			(sum) total_discharges = visit, ///
			by(ID_RECIEN_NACIDO NAC_* mrg_EEHH2NAC)

* Re-apply labels:
foreach var of varlist days_y?? nadmssn_y?? ndischrg_y?? {
	label var `var' "``var'_lbl': Private"
}
label var total_discharges "Total number of hospital discharges `=min_ano_eehh'-`=max_ano_eehh': Private"

* Convert 0s to missing and cap at 365 days:
foreach var of varlist days_y?? {
	replace `var' = . if `var' == 0
	replace `var' = min(`var', 365) if `var' != .
}

* Convert days and admission/discharges variables to missing for ages not covered by EEHH database:
foreach var of varlist days_y?? {
	local yy = subinstr("`var'", "days_y", "", 1)
	local y = real("`yy'")
	
	replace `var' = . if NAC_ANO_NAC + `y' < min_ano_eehh
	replace nadmssn_y`yy' = . if NAC_ANO_NAC + `y' < min_ano_eehh
	replace ndischrg_y`yy' = . if NAC_ANO_NAC + `y' < min_ano_eehh
}

* Convert days and admission/discharges variables to missing for ages unattained by end of EEHH database:
foreach var of varlist days_y?? {
	local yy = subinstr("`var'", "days_y", "", 1)
	local y = real("`yy'")
	
	replace `var' = . if NAC_ANO_NAC + `y' > max_ano_eehh
	replace nadmssn_y`yy' = . if NAC_ANO_NAC + `y' > max_ano_eehh
	replace ndischrg_y`yy' = . if NAC_ANO_NAC + `y' > max_ano_eehh
}


* Merge in mortality:
gen ID_FALLECIDO = ID_RECIEN_NACIDO
merge 1:1 ID_FALLECIDO using "DEF_1990_2018_NOGLOSAS_NODUPS_NONAS.dta", ///
	gen(mrg_DEF2NAC) keep(master match) keepusing(ANO_DEF)

label var mrg_DEF2NAC "merge 1:1 ID_FALLECIDO using DEF, keep(master match)"
	
* Convert days and admission/discharges variables to missing for ages unattained by dead people:
foreach var of varlist days_y?? {
	local yy = subinstr("`var'", "days_y", "", 1)
	local y = real("`yy'")
	
	replace `var' = . if mrg_DEF2NAC == 3 & ANO_DEF < NAC_ANO_NAC + `y'
	replace nadmssn_y`yy' = . if mrg_DEF2NAC == 3 & ANO_DEF < NAC_ANO_NAC + `y'
	replace ndischrg_y`yy' = . if mrg_DEF2NAC == 3 & ANO_DEF < NAC_ANO_NAC + `y'
}

* Rename variables to identify as private:
foreach var of varlist days_y?? {
	local yy = subinstr("`var'", "days_y", "", 1)
	local y = real("`yy'")
	
	rename `var' `var'_priv
	rename nadmssn_y`yy' nadmssn_y`yy'_priv
	rename ndischrg_y`yy' ndischrg_y`yy'_priv	
}
rename total_discharges total_discharges_priv


* Compress, sort, label, sign, and save:
compress
sort ID_RECIEN_NACIDO
label data "Number of hospitalizations and days hospitalized in private hospitals, by age"
notes drop _dta
notes: Last modified on $S_DATE at $S_TIME
save "DAYS_Y_AGG_PRIVATE.dta", replace



*-------------------------
* 4.3 PUBLIC
*-------------------------

* Load data:
use "${eehh_original}_DAYS_Y.dta", clear


* CALCULATE IMPORTANT SCALARS:

* Get minimum and maximum NAC year:
sum NAC_$byear_var
scalar min_ano_nac = r(min)
scalar max_ano_nac = r(max)

* Get minimum and maximum EEHH year:
sum $eyear_var
scalar min_ano_eehh = r(min)
scalar max_ano_eehh = r(max)


* FILTER DATASET: only public hospitals

foreach var of varlist days_y?? {
	local yy = subinstr("`var'", "days_y", "", 1)
	local y = real("`yy'")
	
	replace `var' = . if public_hosp == 0
	replace nadmssn_y`yy' = 0 if public_hosp == 0
	replace ndischrg_y`yy' = 0 if public_hosp == 0
}


* Store variable labels before collapse:
foreach var of varlist days_y?? nadmssn_y?? ndischrg_y?? {
	local `var'_lbl : variable label `var'
}

* Collapse:
collapse 	(sum) days_y?? nadmssn_y?? ndischrg_y?? ///
			(sum) total_discharges = visit, ///
			by(ID_RECIEN_NACIDO NAC_* mrg_EEHH2NAC)

* Re-apply labels:
foreach var of varlist days_y?? nadmssn_y?? ndischrg_y?? {
	label var `var' "``var'_lbl': Public"
}
label var total_discharges "Total number of hospital discharges `=min_ano_eehh'-`=max_ano_eehh': Public"

* Convert 0s to missing and cap at 365 days:
foreach var of varlist days_y?? {
	replace `var' = . if `var' == 0
	replace `var' = min(`var', 365) if `var' != .
}

* Convert days and admission/discharges variables to missing for ages not covered by EEHH database:
foreach var of varlist days_y?? {
	local yy = subinstr("`var'", "days_y", "", 1)
	local y = real("`yy'")
	
	replace `var' = . if NAC_ANO_NAC + `y' < min_ano_eehh
	replace nadmssn_y`yy' = . if NAC_ANO_NAC + `y' < min_ano_eehh
	replace ndischrg_y`yy' = . if NAC_ANO_NAC + `y' < min_ano_eehh
}

* Convert days and admission/discharges variables to missing for ages unattained by end of EEHH database:
foreach var of varlist days_y?? {
	local yy = subinstr("`var'", "days_y", "", 1)
	local y = real("`yy'")
	
	replace `var' = . if NAC_ANO_NAC + `y' > max_ano_eehh
	replace nadmssn_y`yy' = . if NAC_ANO_NAC + `y' > max_ano_eehh
	replace ndischrg_y`yy' = . if NAC_ANO_NAC + `y' > max_ano_eehh
}


* Merge in mortality:
gen ID_FALLECIDO = ID_RECIEN_NACIDO
merge 1:1 ID_FALLECIDO using "DEF_1990_2018_NOGLOSAS_NODUPS_NONAS.dta", ///
	gen(mrg_DEF2NAC) keep(master match) keepusing(ANO_DEF)

label var mrg_DEF2NAC "merge 1:1 ID_FALLECIDO using DEF, keep(master match)"
	
* Convert days and admission/discharges variables to missing for ages unattained by dead people:
foreach var of varlist days_y?? {
	local yy = subinstr("`var'", "days_y", "", 1)
	local y = real("`yy'")
	
	replace `var' = . if mrg_DEF2NAC == 3 & ANO_DEF < NAC_ANO_NAC + `y'
	replace nadmssn_y`yy' = . if mrg_DEF2NAC == 3 & ANO_DEF < NAC_ANO_NAC + `y'
	replace ndischrg_y`yy' = . if mrg_DEF2NAC == 3 & ANO_DEF < NAC_ANO_NAC + `y'
}

* Rename variables to identify as private:
foreach var of varlist days_y?? {
	local yy = subinstr("`var'", "days_y", "", 1)
	local y = real("`yy'")
	
	rename `var' `var'_pblc
	rename nadmssn_y`yy' nadmssn_y`yy'_pblc
	rename ndischrg_y`yy' ndischrg_y`yy'_pblc	
}
rename total_discharges total_discharges_pblc


* Compress, sort, label, sign, and save:
compress
sort ID_RECIEN_NACIDO
label data "Number of hospitalizations and days hospitalized in public hospitals, by age"
notes drop _dta
notes: Last modified on $S_DATE at $S_TIME
save "DAYS_Y_AGG_PUBLIC.dta", replace
*-------------------------
* 4.4 ISAPRE
*-------------------------

* Load data:
use "${eehh_original}_DAYS_Y.dta", clear


* CALCULATE IMPORTANT SCALARS:

* Get minimum and maximum NAC year:
sum NAC_$byear_var
scalar min_ano_nac = r(min)
scalar max_ano_nac = r(max)

* Get minimum and maximum EEHH year:
sum $eyear_var
scalar min_ano_eehh = r(min)
scalar max_ano_eehh = r(max)


* FILTER DATASET: only hospital visits covered by ISAPRE

foreach var of varlist days_y?? {
	local yy = subinstr("`var'", "days_y", "", 1)
	local y = real("`yy'")
	
	replace `var' = . if isapre == 0 | isapre == .
	replace nadmssn_y`yy' = 0 if isapre == 0 | isapre == .
	replace ndischrg_y`yy' = 0 if isapre == 0 | isapre == .
}


* Store variable labels before collapse:
foreach var of varlist days_y?? nadmssn_y?? ndischrg_y?? {
	local `var'_lbl : variable label `var'
}

* Collapse:
collapse 	(sum) days_y?? nadmssn_y?? ndischrg_y?? ///
			(sum) total_discharges = visit, ///
			by(ID_RECIEN_NACIDO NAC_* mrg_EEHH2NAC)

* Re-apply labels:
foreach var of varlist days_y?? nadmssn_y?? ndischrg_y?? {
	label var `var' "``var'_lbl': ISAPRE"
}
label var total_discharges "Total number of hospital discharges `=min_ano_eehh'-`=max_ano_eehh': ISAPRE"

* Convert 0s to missing and cap at 365 days:
foreach var of varlist days_y?? {
	replace `var' = . if `var' == 0
	replace `var' = min(`var', 365) if `var' != .
}

* Convert days and admission/discharges variables to missing for ages not covered by EEHH database:
foreach var of varlist days_y?? {
	local yy = subinstr("`var'", "days_y", "", 1)
	local y = real("`yy'")
	
	replace `var' = . if NAC_ANO_NAC + `y' < min_ano_eehh
	replace nadmssn_y`yy' = . if NAC_ANO_NAC + `y' < min_ano_eehh
	replace ndischrg_y`yy' = . if NAC_ANO_NAC + `y' < min_ano_eehh
}

* Convert days and admission/discharges variables to missing for ages unattained by end of EEHH database:
foreach var of varlist days_y?? {
	local yy = subinstr("`var'", "days_y", "", 1)
	local y = real("`yy'")
	
	replace `var' = . if NAC_ANO_NAC + `y' > max_ano_eehh
	replace nadmssn_y`yy' = . if NAC_ANO_NAC + `y' > max_ano_eehh
	replace ndischrg_y`yy' = . if NAC_ANO_NAC + `y' > max_ano_eehh
}


* Merge in mortality:
gen ID_FALLECIDO = ID_RECIEN_NACIDO
merge 1:1 ID_FALLECIDO using "DEF_1990_2018_NOGLOSAS_NODUPS_NONAS.dta", ///
	gen(mrg_DEF2NAC) keep(master match) keepusing(ANO_DEF)

label var mrg_DEF2NAC "merge 1:1 ID_FALLECIDO using DEF, keep(master match)"
	
* Convert days and admission/discharges variables to missing for ages unattained by dead people:
foreach var of varlist days_y?? {
	local yy = subinstr("`var'", "days_y", "", 1)
	local y = real("`yy'")
	
	replace `var' = . if mrg_DEF2NAC == 3 & ANO_DEF < NAC_ANO_NAC + `y'
	replace nadmssn_y`yy' = . if mrg_DEF2NAC == 3 & ANO_DEF < NAC_ANO_NAC + `y'
	replace ndischrg_y`yy' = . if mrg_DEF2NAC == 3 & ANO_DEF < NAC_ANO_NAC + `y'
}

* Rename variables to identify as private:
foreach var of varlist days_y?? {
	local yy = subinstr("`var'", "days_y", "", 1)
	local y = real("`yy'")
	
	rename `var' `var'_ispr
	rename nadmssn_y`yy' nadmssn_y`yy'_ispr
	rename ndischrg_y`yy' ndischrg_y`yy'_ispr	
}
rename total_discharges total_discharges_ispr


* Compress, sort, label, sign, and save:
compress
sort ID_RECIEN_NACIDO
label data "Number of hospitalizations and days hospitalized covered by ISAPRE, by age"
notes drop _dta
notes: Last modified on $S_DATE at $S_TIME
save "DAYS_Y_AGG_ISAPRE.dta", replace

*-------------------------
* 4.5 BCODES
*-------------------------
* Load data:
use "${eehh_original}_DAYS_Y.dta", clear


* CALCULATE IMPORTANT SCALARS:

* Get minimum and maximum NAC year:
sum NAC_$byear_var
scalar min_ano_nac = r(min)
scalar max_ano_nac = r(max)

* Get minimum and maximum EEHH year:
sum $eyear_var
scalar min_ano_eehh = r(min)
scalar max_ano_eehh = r(max)

* FILTER DATASET: only women

* local macro for temporary file:
tempfile non_women

* Save list of non-women (to be appended back afterwards):
preserve
keep if NAC_SEXO != 2
keep ID_RECIEN_NACIDO NAC_* mrg_EEHH2NAC
compress
duplicates drop
save `non_women', replace
restore

* Keep only women:
keep if NAC_SEXO == 2

* Don't count hospital visits that are not birth codes:
foreach var of varlist days_y?? {
    local yy = subinstr("`var'", "days_y", "", 1)
    local y  = real("`yy'")
    replace `var'            = . if birth_codes == 0 | birth_codes == .
    replace nadmssn_y`yy'   = 0 if birth_codes == 0 | birth_codes == .
    replace ndischrg_y`yy'  = 0 if birth_codes == 0 | birth_codes == .
}


* Store variable labels before collapse:
foreach var of varlist days_y?? nadmssn_y?? ndischrg_y?? {
    local `var'_lbl : variable label `var'
}

* Collapse:
collapse (sum) days_y?? nadmssn_y?? ndischrg_y?? ///
        (sum) total_discharges = visit, ///
        by(ID_RECIEN_NACIDO NAC_* mrg_EEHH2NAC)

* Re-apply labels:
foreach var of varlist days_y?? nadmssn_y?? ndischrg_y?? {
    label var `var' "``var'_lbl': Birth Codes"
}
label var total_discharges ///
    "Total number of hospital discharges `=min_ano_eehh'-`=max_ano_eehh': Birth Codes"

* Convert 0s to missing and cap at 365 days:
foreach var of varlist days_y?? {
    replace `var' = . if `var' == 0
    replace `var' = min(`var',365) if `var' != .
}

* Convert days and admission/discharge vars to missing for ages before EEHH:
foreach var of varlist days_y?? {
    local yy = subinstr("`var'", "days_y", "", 1)
    local y  = real("`yy'")
    replace `var'           = . if NAC_ANO_NAC+`y' < min_ano_eehh
    replace nadmssn_y`yy'  = . if NAC_ANO_NAC+`y' < min_ano_eehh
    replace ndischrg_y`yy' = . if NAC_ANO_NAC+`y' < min_ano_eehh
}

* Convert days and admission/discharge vars to missing for ages after EEHH:
foreach var of varlist days_y?? {
    local yy = subinstr("`var'", "days_y", "", 1)
    local y  = real("`yy'")
    replace `var'           = . if NAC_ANO_NAC+`y' > max_ano_eehh
    replace nadmssn_y`yy'  = . if NAC_ANO_NAC+`y' > max_ano_eehh
    replace ndischrg_y`yy' = . if NAC_ANO_NAC+`y' > max_ano_eehh
}

* Merge in mortality:
gen ID_FALLECIDO = ID_RECIEN_NACIDO
merge 1:1 ID_FALLECIDO using "DEF_1990_2018_NOGLOSAS_NODUPS_NONAS.dta", ///
      gen(mrg_DEF2NAC) keep(master match) keepusing(ANO_DEF)
label var mrg_DEF2NAC "merge 1:1 ID_FALLECIDO using DEF, keep(master match)"

* Convert days/admissions/discharges to missing for ages unattained by deceased:
foreach var of varlist days_y?? {
    local yy = subinstr("`var'", "days_y", "", 1)
    local y  = real("`yy'")
    replace `var'           = . if mrg_DEF2NAC==3 & ANO_DEF < NAC_ANO_NAC+`y'
    replace nadmssn_y`yy'  = . if mrg_DEF2NAC==3 & ANO_DEF < NAC_ANO_NAC+`y'
    replace ndischrg_y`yy' = . if mrg_DEF2NAC==3 & ANO_DEF < NAC_ANO_NAC+`y'
}

* Rename variables to identify as birth/pregnancy codes:
foreach var of varlist days_y?? {
    local yy = subinstr("`var'", "days_y", "", 1)
    rename `var'           `var'_bcodes
    rename nadmssn_y`yy'  nadmssn_y`yy'_bcodes
    rename ndischrg_y`yy' ndischrg_y`yy'_bcodes
}
rename total_discharges total_discharges_bcodes

* Append back non-women:
append using `non_women'


* Compress, sort, label, and save:
compress
sort ID_RECIEN_NACIDO
label data "Number of hospitalizations and days hospitalized coded as birth/pregnancy related, by age"
notes drop _dta
notes: Last modified on $S_DATE at $S_TIME
save "DAYS_Y_AGG_BCODES.dta", replace

*-------------------------
* 4.6 CHRONIC
*-------------------------

* Load data:
use "${eehh_original}_DAYS_Y.dta", clear


* CALCULATE IMPORTANT SCALARS:

* Get minimum and maximum NAC year:
sum NAC_$byear_var
scalar min_ano_nac = r(min)
scalar max_ano_nac = r(max)

* Get minimum and maximum EEHH year:
sum $eyear_var
scalar min_ano_eehh = r(min)
scalar max_ano_eehh = r(max)



* Don't count hospital visits that are not chronic illneses:
foreach var of varlist days_y?? {
	local yy = subinstr("`var'", "days_y", "", 1)
	local y = real("`yy'")
	
	replace `var' = . if notext == 0
	replace nadmssn_y`yy' = 0 if notext == 0 
	replace ndischrg_y`yy' = 0 if notext == 0 
}


* Store variable labels before collapse:
foreach var of varlist days_y?? nadmssn_y?? ndischrg_y?? {
	local `var'_lbl : variable label `var'
}

* Collapse:
collapse 	(sum) days_y?? nadmssn_y?? ndischrg_y?? ///
			(sum) total_discharges = visit, ///
			by(ID_RECIEN_NACIDO NAC_* mrg_EEHH2NAC)

* Re-apply labels:
foreach var of varlist days_y?? nadmssn_y?? ndischrg_y?? {
	label var `var' "``var'_lbl': Not External"
}
label var total_discharges "Total number of hospital discharges `=min_ano_eehh'-`=max_ano_eehh': Not External"

* Convert 0s to missing and cap at 365 days:
foreach var of varlist days_y?? {
	replace `var' = . if `var' == 0
	replace `var' = min(`var', 365) if `var' != .
}

* Convert days and admission/discharges variables to missing for ages not covered by EEHH database:
foreach var of varlist days_y?? {
	local yy = subinstr("`var'", "days_y", "", 1)
	local y = real("`yy'")
	
	replace `var' = . if NAC_ANO_NAC + `y' < min_ano_eehh
	replace nadmssn_y`yy' = . if NAC_ANO_NAC + `y' < min_ano_eehh
	replace ndischrg_y`yy' = . if NAC_ANO_NAC + `y' < min_ano_eehh
}

* Convert days and admission/discharges variables to missing for ages unattained by end of EEHH database:
foreach var of varlist days_y?? {
	local yy = subinstr("`var'", "days_y", "", 1)
	local y = real("`yy'")
	
	replace `var' = . if NAC_ANO_NAC + `y' > max_ano_eehh
	replace nadmssn_y`yy' = . if NAC_ANO_NAC + `y' > max_ano_eehh
	replace ndischrg_y`yy' = . if NAC_ANO_NAC + `y' > max_ano_eehh
}


* Merge in mortality:
gen ID_FALLECIDO = ID_RECIEN_NACIDO
merge 1:1 ID_FALLECIDO using "DEF_1990_2018_NOGLOSAS_NODUPS_NONAS.dta", ///
	gen(mrg_DEF2NAC) keep(master match) keepusing(ANO_DEF)

label var mrg_DEF2NAC "merge 1:1 ID_FALLECIDO using DEF, keep(master match)"
	
* Convert days and admission/discharges variables to missing for ages unattained by dead people:
foreach var of varlist days_y?? {
	local yy = subinstr("`var'", "days_y", "", 1)
	local y = real("`yy'")
	
	replace `var' = . if mrg_DEF2NAC == 3 & ANO_DEF < NAC_ANO_NAC + `y'
	replace nadmssn_y`yy' = . if mrg_DEF2NAC == 3 & ANO_DEF < NAC_ANO_NAC + `y'
	replace ndischrg_y`yy' = . if mrg_DEF2NAC == 3 & ANO_DEF < NAC_ANO_NAC + `y'
}

* Rename variables to identify as private:
foreach var of varlist days_y?? {
	local yy = subinstr("`var'", "days_y", "", 1)
	local y = real("`yy'")
	
	rename `var' `var'_notext
	rename nadmssn_y`yy' nadmssn_y`yy'_notext
	rename ndischrg_y`yy' ndischrg_y`yy'_notext
}
rename total_discharges total_discharges_notext


* Compress, sort, label, sign, and save:
compress
sort ID_RECIEN_NACIDO
label data "Number of hospitalizations and days hospitalized with non-external conditions, by age"
notes drop _dta
notes: Last modified on $S_DATE at $S_TIME
save "DAYS_Y_AGG_NOTEXT.dta", replace

*-------------------------
* 4.7 ABORTIONS
*-------------------------

* Load data:
use "${eehh_original}_DAYS_Y.dta", clear


* CALCULATE IMPORTANT SCALARS:

* Get minimum and maximum NAC year:
sum NAC_$byear_var
scalar min_ano_nac = r(min)
scalar max_ano_nac = r(max)

* Get minimum and maximum EEHH year:
sum $eyear_var
scalar min_ano_eehh = r(min)
scalar max_ano_eehh = r(max)


* FILTER DATASET: only women

* local macro for temporary file:
tempfile non_women

* Save list of non women (to be appended back afterwards):
preserve
keep if NAC_SEXO != 2
keep ID_RECIEN_NACIDO NAC_* mrg_EEHH2NAC
compress
duplicates drop
save `non_women', replace
restore

* Keep only women:
keep if NAC_SEXO == 2

* Don't count hospital visits that are not abortions:
foreach var of varlist days_y?? {
	local yy = subinstr("`var'", "days_y", "", 1)
	local y = real("`yy'")
	
	replace `var' = . if abortion == 0 | abortion == .
	replace nadmssn_y`yy' = 0 if abortion == 0 | abortion == .
	replace ndischrg_y`yy' = 0 if abortion == 0 | abortion == .
}



* Store variable labels before collapse:
foreach var of varlist days_y?? nadmssn_y?? ndischrg_y?? {
	local `var'_lbl : variable label `var'
}

* Collapse:
collapse 	(sum) days_y?? nadmssn_y?? ndischrg_y?? ///
			(sum) total_discharges = visit, ///
			by(ID_RECIEN_NACIDO NAC_* mrg_EEHH2NAC)

* Re-apply labels:
foreach var of varlist days_y?? nadmssn_y?? ndischrg_y?? {
	label var `var' "``var'_lbl': Abortion"
}
label var total_discharges "Total number of hospital discharges `=min_ano_eehh'-`=max_ano_eehh': Abortion"

* Convert 0s to missing and cap at 365 days:
foreach var of varlist days_y?? {
	replace `var' = . if `var' == 0
	replace `var' = min(`var', 365) if `var' != .
}

* Convert days and admission/discharges variables to missing for ages not covered by EEHH database:
foreach var of varlist days_y?? {
	local yy = subinstr("`var'", "days_y", "", 1)
	local y = real("`yy'")
	
	replace `var' = . if NAC_ANO_NAC + `y' < min_ano_eehh
	replace nadmssn_y`yy' = . if NAC_ANO_NAC + `y' < min_ano_eehh
	replace ndischrg_y`yy' = . if NAC_ANO_NAC + `y' < min_ano_eehh
}

* Convert days and admission/discharges variables to missing for ages unattained by end of EEHH database:
foreach var of varlist days_y?? {
	local yy = subinstr("`var'", "days_y", "", 1)
	local y = real("`yy'")
	
	replace `var' = . if NAC_ANO_NAC + `y' > max_ano_eehh
	replace nadmssn_y`yy' = . if NAC_ANO_NAC + `y' > max_ano_eehh
	replace ndischrg_y`yy' = . if NAC_ANO_NAC + `y' > max_ano_eehh
}


* Merge in mortality:
gen ID_FALLECIDO = ID_RECIEN_NACIDO
merge 1:1 ID_FALLECIDO using "DEF_1990_2018_NOGLOSAS_NODUPS_NONAS.dta", ///
	gen(mrg_DEF2NAC) keep(master match) keepusing(ANO_DEF)

label var mrg_DEF2NAC "merge 1:1 ID_FALLECIDO using DEF, keep(master match)"
	
* Convert days and admission/discharges variables to missing for ages unattained by dead people:
foreach var of varlist days_y?? {
	local yy = subinstr("`var'", "days_y", "", 1)
	local y = real("`yy'")
	
	replace `var' = . if mrg_DEF2NAC == 3 & ANO_DEF < NAC_ANO_NAC + `y'
	replace nadmssn_y`yy' = . if mrg_DEF2NAC == 3 & ANO_DEF < NAC_ANO_NAC + `y'
	replace ndischrg_y`yy' = . if mrg_DEF2NAC == 3 & ANO_DEF < NAC_ANO_NAC + `y'
}

* Rename variables to identify as private:
foreach var of varlist days_y?? {
	local yy = subinstr("`var'", "days_y", "", 1)
	local y = real("`yy'")
	
	rename `var' `var'_abrtn
	rename nadmssn_y`yy' nadmssn_y`yy'_abrtn
	rename ndischrg_y`yy' ndischrg_y`yy'_abrtn	
}
rename total_discharges total_discharges_abrtn


* Append back non-women:
append using `non_women'


* Compress, sort, label, sign, and save:
compress
sort ID_RECIEN_NACIDO
label data "Number of hospitalizations and days hospitalized coded as abortions, by age"
notes drop _dta
notes: Last modified on $S_DATE at $S_TIME
save "DAYS_Y_AGG_ABORTIONS.dta", replace


*-----------------------
* 4.8 REPHEALTH
*-----------------------

use "${eehh_original}_DAYS_Y.dta", clear


* CALCULATE IMPORTANT SCALARS:

* Get minimum and maximum NAC year:
sum NAC_$byear_var
scalar min_ano_nac = r(min)
scalar max_ano_nac = r(max)

* Get minimum and maximum EEHH year:
sum $eyear_var
scalar min_ano_eehh = r(min)
scalar max_ano_eehh = r(max)



* Don't count hospital visits that are not chronic illneses:
foreach var of varlist days_y?? {
	local yy = subinstr("`var'", "days_y", "", 1)
	local y = real("`yy'")
	
	replace `var' = . if rephlth == 0
	replace nadmssn_y`yy' = 0 if rephlth == 0 
	replace ndischrg_y`yy' = 0 if rephlth == 0 
}


* Store variable labels before collapse:
foreach var of varlist days_y?? nadmssn_y?? ndischrg_y?? {
	local `var'_lbl : variable label `var'
}

* Collapse:
collapse 	(sum) days_y?? nadmssn_y?? ndischrg_y?? ///
			(sum) total_discharges = visit, ///
			by(ID_RECIEN_NACIDO NAC_* mrg_EEHH2NAC)

* Re-apply labels:
foreach var of varlist days_y?? nadmssn_y?? ndischrg_y?? {
	label var `var' "``var'_lbl': Reproductive Health"
}
label var total_discharges "Total number of hospital discharges `=min_ano_eehh'-`=max_ano_eehh': Reproductive Health"

* Convert 0s to missing and cap at 365 days:
foreach var of varlist days_y?? {
	replace `var' = . if `var' == 0
	replace `var' = min(`var', 365) if `var' != .
}

* Convert days and admission/discharges variables to missing for ages not covered by EEHH database:
foreach var of varlist days_y?? {
	local yy = subinstr("`var'", "days_y", "", 1)
	local y = real("`yy'")
	
	replace `var' = . if NAC_ANO_NAC + `y' < min_ano_eehh
	replace nadmssn_y`yy' = . if NAC_ANO_NAC + `y' < min_ano_eehh
	replace ndischrg_y`yy' = . if NAC_ANO_NAC + `y' < min_ano_eehh
}

* Convert days and admission/discharges variables to missing for ages unattained by end of EEHH database:
foreach var of varlist days_y?? {
	local yy = subinstr("`var'", "days_y", "", 1)
	local y = real("`yy'")
	
	replace `var' = . if NAC_ANO_NAC + `y' > max_ano_eehh
	replace nadmssn_y`yy' = . if NAC_ANO_NAC + `y' > max_ano_eehh
	replace ndischrg_y`yy' = . if NAC_ANO_NAC + `y' > max_ano_eehh
}


* Merge in mortality:
gen ID_FALLECIDO = ID_RECIEN_NACIDO
merge 1:1 ID_FALLECIDO using "DEF_1990_2018_NOGLOSAS_NODUPS_NONAS.dta", ///
	gen(mrg_DEF2NAC) keep(master match) keepusing(ANO_DEF)

label var mrg_DEF2NAC "merge 1:1 ID_FALLECIDO using DEF, keep(master match)"
	
* Convert days and admission/discharges variables to missing for ages unattained by dead people:
foreach var of varlist days_y?? {
	local yy = subinstr("`var'", "days_y", "", 1)
	local y = real("`yy'")
	
	replace `var' = . if mrg_DEF2NAC == 3 & ANO_DEF < NAC_ANO_NAC + `y'
	replace nadmssn_y`yy' = . if mrg_DEF2NAC == 3 & ANO_DEF < NAC_ANO_NAC + `y'
	replace ndischrg_y`yy' = . if mrg_DEF2NAC == 3 & ANO_DEF < NAC_ANO_NAC + `y'
}

* Rename variables to identify as private:
foreach var of varlist days_y?? {
	local yy = subinstr("`var'", "days_y", "", 1)
	local y = real("`yy'")
	
	rename `var' `var'_rphlth
	rename nadmssn_y`yy' nadmssn_y`yy'_rphlth
	rename ndischrg_y`yy' ndischrg_y`yy'_rphlth
}
rename total_discharges total_discharges_rphlth

* Compress, sort, label, sign, and save:
compress
sort ID_RECIEN_NACIDO
label data "Number of hospitalizations and days hospitalized with reproductive health conditions, by age"
notes drop _dta
notes: Last modified on $S_DATE at $S_TIME
save "DAYS_Y_AGG_REPHEALTH.dta", replace

*---------------------------------------------------------------------------
* 4.X. In Months: It's not used in the MERGE file. ///
				  Aside of that, the do-files don't exist. 
*---------------------------------------------------------------------------
